/*****************************************************************************
*Purpose: Calculate pairwise and false discovery rate adjusted p-values
******************************************************************************/ 

/*

*** outcomes for children

Earnings at age 24-28
Owner
Later-life share black
Married
Mortality (Appendix)
Incarceration (Appendix)

*/

clear
set obs 6

gen est = .
gen err = .

* earnings
replace est = 2341 if _n==1
replace err = 626 if _n==1

* most imprecise estimate
*replace est = 2425 if _n==1
*replace err = 1126 if _n==1

* owner
replace est = 0.0999 if _n==2 
replace err = 0.049 if _n==2

* later-life share black
replace est = -.098 if _n==3
replace err = 0.020 if _n==3

* married
replace est = 0.069 if _n==4
replace err = 0.028 if _n==4

* mortality
replace est = -0.004 if _n==5
replace err = 0.211 if _n==5

* incarceration
replace est = -.003 if _n==6
replace err = 0.009 if _n==6

* create p-values
gen pval = ttail(38,abs(est/err))*2

keep pval

* below is code from Anderson (2008)

quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval != .

* Set the initial counter to 1 

local qval = 1

* Generate the variable that will contain the BH (1995) q-values

gen bh95_qval = 1 if pval != .

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, then checks which hypotheses are rejected at q = 0.999, then checks which hypotheses are rejected at q = 0.998, etc.  The loop ends by checking which hypotheses are rejected at q = 0.001.

while `qval' > 0 {
	* Generate value qr/M
	quietly gen fdr_temp = `qval' * rank / `totalpvals'
	* Generate binary variable checking condition p(r) <= qr/M
	quietly gen reject_temp = ( fdr_temp >= pval ) if fdr_temp != .
	* Generate variable containing p-value ranks for all p-values that meet above condition
	quietly gen reject_rank = reject_temp * rank
	* Record the rank of the largest p-value that meets above condition
	quietly egen total_rejected = max( reject_rank )
	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace bh95_qval = `qval' if rank <= total_rejected & rank != .
	* Reduce q by 0.001 and repeat loop
	quietly drop fdr_temp reject_temp reject_rank total_rejected
	local qval = `qval' - .001
}
	
quietly sort original_sorting_order
pause off
set more on

display "Code has completed."
display "Benjamini Hochberg (1995) q-vals are in variable 'bh95_qval'"
display	"Sorting order is the same as the original vector of p-values"




